This Rmd includes code for creating “weightings” for the visits data, and starting to compare with case data, for Alameda County.

library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)
library(arsenal)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

# Load the processing functions. This also loads the normalization functions.
source("safegraph_process_patterns_functions.R")

sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'

sg_hourly_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/weekly-patterns/v2/hourly/'

Load the hourly visits data.

# # load places and weights
# bay_hourly_visits_places_309_322 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_3-09_3-22.rds"))
# bay_hourly_visits_places_323_405 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_3-23_4-05.rds"))
# bay_hourly_visits_places_406_412 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_4-06_4-12.rds"))
# bay_hourly_visits_places_413_426 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_4-13_4-26.rds"))
# bay_hourly_visits_places_427_510 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_4-27_5-10.rds"))
# bay_hourly_visits_places_511_524 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_5-11_5-24.rds"))
# bay_hourly_visits_places_525_531 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_5-25_5-31.rds"))
# bay_hourly_visits_places_601_607 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_6-01_6-07.rds"))
# bay_hourly_visits_places_608_621 <- readRDS(paste0(sg_hourly_path, "bay_hourly_visits_places_6-08_6-21.rds"))
# 
# # load visits by origin
# ac_hourly_visits_zip_309_322 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_3-09_3-22.rds"))
# ac_hourly_visits_zip_323_405 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_3-23_4-05.rds"))
# ac_hourly_visits_zip_406_412 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_4-06_4-12.rds"))
# ac_hourly_visits_zip_413_426 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_4-13_4-26.rds"))
# ac_hourly_visits_zip_427_510 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_4-27_5-10.rds"))
# ac_hourly_visits_zip_511_524 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_5-11_5-24.rds"))
# ac_hourly_visits_zip_525_531 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_5-25_5-31.rds"))
# ac_hourly_visits_zip_601_607 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_6-01_6-07.rds"))
# ac_hourly_visits_zip_608_621 <- readRDS(paste0(sg_hourly_path, "ac_hourly_visits_zip_6-08_6-21.rds"))

Process to get weighted visits.

# # input the visits from AC and the POI visits data for the whole Bay for the same date range, and the population by zip code
# getWeightedVisits <- function(ac_hourly_visits_zip, bay_hourly_visits_places) {
#   ac_hourly_visits_zip_weights <- left_join(ac_hourly_visits_zip, bay_hourly_visits_places %>% dplyr::select(safegraph_place_id, hour, date, total_visits_hourly_avg, area_square_feet, hourly_visits_per_area, weekly_median_dwell) %>% rename(place_visits_per_area = hourly_visits_per_area, place_total_visits_avg = total_visits_hourly_avg)) %>%
#   mutate(weighted_visits = total_visits_avg * place_visits_per_area,
#          visit_hours = total_visits_avg * weekly_median_dwell/60, # dwell is in minutes, convert to hours
#          visits_per_area = total_visits_avg / area_square_feet,
#          weighted_visit_hours = visit_hours * place_visits_per_area,
#          weighted_visit_hours_v2 = visit_hours * place_visits_per_area * weekly_median_dwell/60 # multiply the place visits by dwell time to also convert them to visit hours, also converting to hours again
#   )
#   return(ac_hourly_visits_zip_weights)
# }
# 
# # run this for all the visits data
# ac_hourly_visits_zip_weights_309_322 <- getWeightedVisits(ac_hourly_visits_zip_309_322, bay_hourly_visits_places_309_322)
# ac_hourly_visits_zip_weights_323_405 <- getWeightedVisits(ac_hourly_visits_zip_323_405, bay_hourly_visits_places_323_405)
# ac_hourly_visits_zip_weights_406_412 <- getWeightedVisits(ac_hourly_visits_zip_406_412, bay_hourly_visits_places_406_412)
# ac_hourly_visits_zip_weights_413_426 <- getWeightedVisits(ac_hourly_visits_zip_413_426, bay_hourly_visits_places_413_426)
# ac_hourly_visits_zip_weights_427_510 <- getWeightedVisits(ac_hourly_visits_zip_427_510, bay_hourly_visits_places_427_510)
# ac_hourly_visits_zip_weights_511_524 <- getWeightedVisits(ac_hourly_visits_zip_511_524, bay_hourly_visits_places_511_524)
# ac_hourly_visits_zip_weights_525_531 <- getWeightedVisits(ac_hourly_visits_zip_525_531, bay_hourly_visits_places_525_531)
# ac_hourly_visits_zip_weights_601_607 <- getWeightedVisits(ac_hourly_visits_zip_601_607, bay_hourly_visits_places_601_607)
# ac_hourly_visits_zip_weights_608_621 <- getWeightedVisits(ac_hourly_visits_zip_608_621, bay_hourly_visits_places_608_621)
# 
# # summarize for each day and create two data frames
# summarizeWeightedVisits <- function(ac_hourly_visits_zip_weights) {
#   ac_daily_weighted_visits <- ac_hourly_visits_zip_weights %>%
#     mutate(weighted_visits = ifelse(is.na(area_square_feet), 0, weighted_visits),
#            weighted_visit_hours = ifelse(is.na(area_square_feet), 0, weighted_visit_hours),
#            weighted_visit_hours_v2 = ifelse(is.na(area_square_feet), 0, weighted_visit_hours_v2),
#            visits_per_area = ifelse(is.na(area_square_feet), 0, visits_per_area)) %>% # handle any values that have NAs for the area of the POI, because this introduces NAs in the weighted values and visits per area. this doesn't happen often, so shouldn't affect overall results, but I do this to prevent the NAs from causing problems later
#     group_by(zipcode, date) %>%
#     summarize(total_weighted_visits = sum(weighted_visits),
#               total_visit_hours = sum(visit_hours),
#               total_visits_per_area = sum(visits_per_area),
#               total_weighted_visit_hours = sum(weighted_visit_hours),
#               total_visits = sum(total_visits_avg),
#               total_weighted_visit_hours_v2 = sum(weighted_visit_hours_v2))
# 
#   return(ac_daily_weighted_visits)
# }
# 
# ac_weighted_visits_zip_1 <- rbind(summarizeWeightedVisits(ac_hourly_visits_zip_weights_309_322), summarizeWeightedVisits(ac_hourly_visits_zip_weights_323_405), summarizeWeightedVisits(ac_hourly_visits_zip_weights_406_412), summarizeWeightedVisits(ac_hourly_visits_zip_weights_413_426), summarizeWeightedVisits(ac_hourly_visits_zip_weights_427_510), summarizeWeightedVisits(ac_hourly_visits_zip_weights_511_524)) %>%
#   filter(!is.na(zipcode))
# 
# ac_weighted_visits_zip_2 <- rbind(summarizeWeightedVisits(ac_hourly_visits_zip_weights_525_531), summarizeWeightedVisits(ac_hourly_visits_zip_weights_601_607), summarizeWeightedVisits(ac_hourly_visits_zip_weights_608_621)) %>%
#   filter(!is.na(zipcode))
# 
# ac_weighted_visits_zip <- rbind(ac_weighted_visits_zip_1, ac_weighted_visits_zip_2)
# 
# # save the RDS files
# saveRDS(ac_weighted_visits_zip_1, paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_03-09-20_05-24-20.rds"))
# saveRDS(ac_weighted_visits_zip_2, paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_05-25-20_06-21-20.rds"))

# load the visits data, weighted
ac_weighted_visits_zip_1 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_03-09-20_05-24-20.rds"))
ac_weighted_visits_zip_2 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_weighted_05-25-20_06-21-20.rds"))
ac_weighted_visits_zip <- rbind(ac_weighted_visits_zip_1, ac_weighted_visits_zip_2)

# load the visits data, unweighted
ac_daily_visits_zip_1 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_03-09-20_05-24-20.rds"))
ac_daily_visits_zip_2 <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_05-25-20_06-21-20.rds"))
ac_daily_visits_zip <- rbind(ac_daily_visits_zip_1, ac_daily_visits_zip_2)

# compare to make sure that the weighted visits data "total visits" column is the same as the unweighted visits
summary(comparedf(ac_daily_visits_zip %>% dplyr::select(date, zipcode, total_visits_avg) %>% rename(total_visits = total_visits_avg) %>% mutate(total_visits = round(total_visits, 3)) %>% arrange(zipcode), ac_weighted_visits_zip %>% dplyr::select(date, zipcode, total_visits) %>% mutate(total_visits = round(total_visits, 3)) %>% arrange(zipcode)))
## 
## 
## Table: Summary of data.frames
## 
## version   arg                                                                                                                                                                                                  ncol   nrow
## --------  --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------  -----  -----
## x         ac_daily_visits_zip %>% dplyr::select(date, zipcode, total_visits_avg) %>%     rename(total_visits = total_visits_avg) %>% mutate(total_visits = round(total_visits,     3)) %>% arrange(zipcode)       3   5145
## y         ac_weighted_visits_zip %>% dplyr::select(date, zipcode, total_visits) %>%     mutate(total_visits = round(total_visits, 3)) %>% arrange(zipcode)                                                        3   5145
## 
## 
## 
## Table: Summary of overall comparison
## 
## statistic                                                      value
## ------------------------------------------------------------  ------
## Number of by-variables                                             0
## Number of non-by variables in common                               3
## Number of variables compared                                       3
## Number of variables in x but not y                                 0
## Number of variables in y but not x                                 0
## Number of variables compared with some values unequal              0
## Number of variables compared with all values equal                 3
## Number of observations in common                                5145
## Number of observations in x but not y                              0
## Number of observations in y but not x                              0
## Number of observations with some compared variables unequal        0
## Number of observations with all compared variables equal        5145
## Number of values unequal                                           0
## 
## 
## 
## Table: Variables not shared
## 
## |                        |
## |:-----------------------|
## |No variables not shared |
## 
## 
## 
## Table: Other variables not compared
## 
## |                                |
## |:-------------------------------|
## |No other variables not compared |
## 
## 
## 
## Table: Observations not shared
## 
## |                           |
## |:--------------------------|
## |No observations not shared |
## 
## 
## 
## Table: Differences detected by variable
## 
## var.x          var.y            n   NAs
## -------------  -------------  ---  ----
## date           date             0     0
## zipcode        zipcode          0     0
## total_visits   total_visits     0     0
## 
## 
## 
## Table: Differences detected
## 
## |                        |
## |:-----------------------|
## |No differences detected |
## 
## 
## 
## Table: Non-identical attributes
## 
## |                            |
## |:---------------------------|
## |No non-identical attributes |

The output above checks that the “total visits” column in the weighted visits data is the same as the unweighted visits data, which it is, so that is good.

Comparing the weighted and unweighted visits in their predictive ability for cases.

# load the case data
ac_place_cases <- read.csv("Simone_Speizer/Alameda_County_Cumulative_Cases_By_Place_And_Zip.csv")

# handle the ones that are reported as <10 by calling them 5 cases. Note this is a simplification/choice I made and could be changed.
ac_cases_zip <- ac_place_cases %>% 
  rename(date = DtCreate) %>%
  mutate(date = date %>%  substr(1,10) %>% as.Date()) %>%
  dplyr::select(c(date, contains("F9"))) %>% # only select zip code data
  gather(key = "zipcode", value = "cases", -date) %>%
  mutate(cases = ifelse(cases == "<10", "5", cases), 
         zipcode = zipcode %>% substr(2,6)) %>% # replace cases <10 with 10 and remove the "F" from zipcode names
  mutate(cases = as.numeric(cases)) %>%
  arrange(date)

# get Alameda County populations by zip code
# census data
acs_vars = readRDS("Simone_Speizer/censusData2018_acs_acs5.rds")

# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
    regionString <- paste0("state:06+county:", county)
    censusData <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
  
  return(censusData)
}

# load block groups and their correspondence to ZCTAs
ac_bg_zctas <- readRDS("Simone_Speizer/ac_bg_ztcas.rds")

ac_bgs <- ac_bg_zctas %>% pull(blockgroup)

# get population data
ac_fips <- fips("CA", "Alameda") %>% substr(3,5)
ac_pop_zip <- pullCensus("B01003_001E", ac_fips) %>% 
  left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

# join with cases
ac_cases_zip <- ac_cases_zip %>% left_join(ac_pop_zip) %>%
  mutate(cases_by_pop = cases / total_pop_zip)

# get visits per capita
ac_weighted_visits_zip <- ac_weighted_visits_zip %>% left_join(ac_pop_zip) %>%
  mutate(weighted_visits_per_capita = total_weighted_visits / total_pop_zip,
         weighted_visit_hours_per_capita = total_weighted_visit_hours / total_pop_zip,
          weighted_visit_hours_per_capita_v2 = total_weighted_visit_hours_v2 / total_pop_zip,
         visits_per_capita = total_visits / total_pop_zip,
         visit_hours_per_capita = total_visit_hours / total_pop_zip,
         visits_per_area_per_capita = total_visits_per_area / total_pop_zip)

Compare these metrics.

Definitions

Weighted visits = weighted by the total number of visits per hour and the place area

Visit-hours = weighted by the median dwell time for each POI

Weighted visit-hours = visit hours also weighted by the total number of visits per hour and the place area

Comparison for change from date

Here I try using change in cases from 4/15 through 6/23 and visits from 4/1 through 6/9.

case_start <- as.Date("2020-04-15")
case_end <- as.Date("2020-06-23")
visits_start <- as.Date("2020-04-01")
visits_end <- as.Date("2020-06-09")
  
ac_init_cases <- ac_cases_zip %>% 
  filter(date == case_start) %>%
  dplyr::select(zipcode, cases_by_pop) %>%
  rename(init_cases_by_pop = cases_by_pop) %>%
  mutate(log_init_cases_by_pop = log(init_cases_by_pop))

# max cases (end date)
ac_final_cases <- ac_cases_zip %>% 
  filter(date == case_end) %>%
  dplyr::select(zipcode, cases_by_pop, total_pop_zip) %>%
  rename(fin_cases_by_pop = cases_by_pop) %>%
  mutate(log_fin_cases_by_pop = log(fin_cases_by_pop))

# summarize visits add current and initial cases
ac_visits_cases_change <- ac_weighted_visits_zip %>%
  filter(date >= visits_start & date <= visits_end) %>%
  filter(!is.na(zipcode) & !is.na(total_visits)) %>%
  group_by(zipcode) %>%
  summarize(total_visits_per_capita = sum(visits_per_capita),
            total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
            total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
            total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
            total_visit_hours_per_capita = sum(visit_hours_per_capita),
            total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
            total_visits = sum(total_visits)) %>%
  left_join(ac_final_cases) %>%
  left_join(ac_init_cases) %>%
  filter(init_cases_by_pop != 0) %>% 
  filter(!is.na(fin_cases_by_pop)) %>%
  mutate(change_log_cases_by_pop = log_fin_cases_by_pop - log_init_cases_by_pop,
         change_cases_by_pop = fin_cases_by_pop - init_cases_by_pop)

# get linear model values
ac_visits_cases_change_log_model <- lm(change_log_cases_by_pop ~ total_visits_per_capita, ac_visits_cases_change)
ac_visits_cases_change_model <- lm(change_cases_by_pop ~ total_visits_per_capita, ac_visits_cases_change)

ac_visits_cases_change_log_model_hours <- lm(change_log_cases_by_pop ~ total_visit_hours_per_capita, ac_visits_cases_change)
ac_visits_cases_change_model_hours <- lm(change_cases_by_pop ~ total_visit_hours_per_capita, ac_visits_cases_change)

ac_wt_visits_cases_change_log_model <- lm(change_log_cases_by_pop ~ total_weighted_visits_per_capita, ac_visits_cases_change)
ac_wt_visits_cases_change_model <- lm(change_cases_by_pop ~ total_weighted_visits_per_capita, ac_visits_cases_change)

ac_wt_visits_cases_change_log_model_hours <- lm(change_log_cases_by_pop ~ total_weighted_visit_hours_per_capita, ac_visits_cases_change)
ac_wt_visits_cases_change_model_hours <- lm(change_cases_by_pop ~ total_weighted_visit_hours_per_capita, ac_visits_cases_change)

  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
  summary(ac_visits_cases_change_model)
## 
## Call:
## lm(formula = change_cases_by_pop ~ total_visits_per_capita, data = ac_visits_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0023390 -0.0014143 -0.0004434  0.0003204  0.0092913 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -1.715e-03  1.523e-03  -1.126   0.2662  
## total_visits_per_capita  9.096e-05  3.595e-05   2.530   0.0151 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002311 on 44 degrees of freedom
## Multiple R-squared:  0.127,  Adjusted R-squared:  0.1072 
## F-statistic: 6.402 on 1 and 44 DF,  p-value: 0.01506
  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
  summary(ac_wt_visits_cases_change_model)
## 
## Call:
## lm(formula = change_cases_by_pop ~ total_weighted_visits_per_capita, 
##     data = ac_visits_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0034666 -0.0012542 -0.0000626  0.0003395  0.0075941 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                      -0.003798   0.001766  -2.151   0.0370 * 
## total_weighted_visits_per_capita  0.020909   0.006215   3.364   0.0016 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002206 on 44 degrees of freedom
## Multiple R-squared:  0.2046, Adjusted R-squared:  0.1865 
## F-statistic: 11.32 on 1 and 44 DF,  p-value: 0.001599
  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visits_cases_change_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_hours)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
  summary(ac_visits_cases_change_model_hours)
## 
## Call:
## lm(formula = change_cases_by_pop ~ total_visit_hours_per_capita, 
##     data = ac_visits_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0024539 -0.0011830 -0.0006157  0.0003578  0.0086581 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  -5.948e-03  2.444e-03  -2.434  0.01908 * 
## total_visit_hours_per_capita  1.979e-04  6.001e-05   3.298  0.00193 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002215 on 44 degrees of freedom
## Multiple R-squared:  0.1982, Adjusted R-squared:   0.18 
## F-statistic: 10.88 on 1 and 44 DF,  p-value: 0.001932
  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~change_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visits_cases_change_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model_hours)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County (only non-zero starting cases)")
  summary(ac_wt_visits_cases_change_model_hours)
## 
## Call:
## lm(formula = change_cases_by_pop ~ total_weighted_visit_hours_per_capita, 
##     data = ac_visits_cases_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0022628 -0.0014172 -0.0007237  0.0002259  0.0095945 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                           0.0018705  0.0008607   2.173   0.0352 *
## total_weighted_visit_hours_per_capita 0.0004724  0.0021608   0.219   0.8280  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002472 on 44 degrees of freedom
## Multiple R-squared:  0.001085,   Adjusted R-squared:  -0.02162 
## F-statistic: 0.04779 on 1 and 44 DF,  p-value: 0.828

That’s really interesting–either weighted visits or visit-hours does better, but weighted visit hours does not. Weird. Try with change in log of cases.

  # plot log
  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(lm(change_log_cases_by_pop ~ total_visits_per_capita, ac_visits_cases_change)), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
summary(ac_visits_cases_change_log_model)
## 
## Call:
## lm(formula = change_log_cases_by_pop ~ total_visits_per_capita, 
##     data = ac_visits_cases_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9301 -0.3215 -0.0451  0.3504  1.6460 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             0.281969   0.372242   0.757   0.4528  
## total_visits_per_capita 0.023079   0.008786   2.627   0.0118 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5648 on 44 degrees of freedom
## Multiple R-squared:  0.1356, Adjusted R-squared:  0.1159 
## F-statistic:   6.9 on 1 and 44 DF,  p-value: 0.01181
  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
  summary(ac_wt_visits_cases_change_log_model)
## 
## Call:
## lm(formula = change_log_cases_by_pop ~ total_weighted_visits_per_capita, 
##     data = ac_visits_cases_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.51894 -0.29000  0.02007  0.34812  1.32806 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       0.07204    0.45236   0.159   0.8742  
## total_weighted_visits_per_capita  4.16422    1.59204   2.616   0.0122 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5651 on 44 degrees of freedom
## Multiple R-squared:  0.1346, Adjusted R-squared:  0.1149 
## F-statistic: 6.842 on 1 and 44 DF,  p-value: 0.01215
  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visits_cases_change_log_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model_hours)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
  summary(ac_visits_cases_change_log_model_hours)
## 
## Call:
## lm(formula = change_log_cases_by_pop ~ total_visit_hours_per_capita, 
##     data = ac_visits_cases_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.34918 -0.37864  0.03592  0.29945  1.46259 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -0.97383    0.58011  -1.679 0.100298    
## total_visit_hours_per_capita  0.05472    0.01424   3.842 0.000388 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5256 on 44 degrees of freedom
## Multiple R-squared:  0.2512, Adjusted R-squared:  0.2342 
## F-statistic: 14.76 on 1 and 44 DF,  p-value: 0.000388
  ac_visits_cases_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~change_log_cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visits_cases_change_log_model_hours), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model_hours)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County (only non-zero starting cases)")
  summary(ac_wt_visits_cases_change_log_model_hours)
## 
## Call:
## lm(formula = change_log_cases_by_pop ~ total_weighted_visit_hours_per_capita, 
##     data = ac_visits_cases_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3357 -0.3702  0.0484  0.3381  1.7101 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             1.1577     0.2111   5.484 1.92e-06 ***
## total_weighted_visit_hours_per_capita   0.2143     0.5300   0.404    0.688    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6063 on 44 degrees of freedom
## Multiple R-squared:  0.003704,   Adjusted R-squared:  -0.01894 
## F-statistic: 0.1636 on 1 and 44 DF,  p-value: 0.6879

Again, really interesting.

Comparison for change from baseline per person

Trying with 1 week later, 16 day lag, 0.0004 baseline per person

  case_weeks_past <- 1
    visits_lag <- 16
    baseline_cases_by_pop  <- 0.0004
    min_baseline_cases <- 10
    
    ac_cases_cutoff <- ac_cases_zip %>% 
      filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
      filter(cases_by_pop >= baseline_cases_by_pop) %>%
      mutate(log_cases_by_pop = log(cases_by_pop))
    
    zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
  
  # get the change in cases and total visit hours in the time period of interest
    later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  
    for (i in 1:length(zipcodes_in_cutoff)) {
      curr_zip <- zipcodes_in_cutoff[i]
      curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
    # first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well 
      change_cases_curr = NA
      change_log_cases_curr = NA
      total_visits_curr = NA
      total_visits_per_capita_curr = NA
      total_visit_hours_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_curr = NA
      total_weighted_visits_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_v2_curr = NA
      if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
      # change in cases an amount of time later
      change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
      change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
    # get the total visits in the time range of interest
      visits_curr = ac_weighted_visits_zip %>% 
        filter(zipcode == curr_zip) %>% 
        filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>% 
        summarize(total_visits_per_capita = sum(visits_per_capita),
            total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
            total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
            total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
            total_visit_hours_per_capita = sum(visit_hours_per_capita),
            total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
            total_visits = sum(total_visits))
      total_visits_curr <- visits_curr$total_visits
      total_visits_per_capita_curr = visits_curr$total_visits_per_capita
      total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
      total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
      total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
      total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
      
    }
    later_cases_change[i] = change_cases_curr
    later_log_cases_change[i] = change_log_cases_curr
    total_visits[i] = total_visits_curr
    total_visits_per_capita[i] = total_visits_per_capita_curr
    total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
    total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
    total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
    total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
  }
  
  # combine
  ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
    filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
    rename(zipcode = zipcodes_in_cutoff) %>%
    left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
    
# get linear model values
  ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
  ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)


  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -2.490e-04 -1.091e-04 -8.720e-06  1.013e-04  3.475e-04 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             -3.153e-04  1.410e-04  -2.237  0.03107 * 
## total_visits_per_capita  1.097e-04  3.144e-05   3.491  0.00121 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001452 on 39 degrees of freedom
## Multiple R-squared:  0.238,  Adjusted R-squared:  0.2185 
## F-statistic: 12.18 on 1 and 39 DF,  p-value: 0.001213
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.927e-04 -1.227e-04 -2.658e-05  3.731e-05  4.510e-04 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)                      1.010e-04  8.571e-05   1.179    0.246
## total_weighted_visits_per_capita 2.461e-03  2.904e-03   0.847    0.402
## 
## Residual standard error: 0.0001649 on 39 degrees of freedom
## Multiple R-squared:  0.01808,    Adjusted R-squared:  -0.007094 
## F-statistic: 0.7182 on 1 and 39 DF,  p-value: 0.4019
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -2.770e-04 -1.057e-04 -2.684e-05  7.257e-05  3.801e-04 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  -9.803e-05  9.840e-05  -0.996  0.32530   
## total_visit_hours_per_capita  5.331e-05  1.898e-05   2.809  0.00772 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001517 on 39 degrees of freedom
## Multiple R-squared:  0.1683, Adjusted R-squared:  0.147 
## F-statistic: 7.893 on 1 and 39 DF,  p-value: 0.007718
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.690e-04 -1.318e-04 -3.093e-05  2.289e-05  4.448e-04 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           1.649e-04  3.774e-05   4.369 8.95e-05 ***
## total_weighted_visit_hours_per_capita 1.130e-04  5.720e-04   0.198    0.844    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001663 on 39 degrees of freedom
## Multiple R-squared:  0.001,  Adjusted R-squared:  -0.02462 
## F-statistic: 0.03904 on 1 and 39 DF,  p-value: 0.8444

That’s super weird. It doesn’t appear to be as good to weight visits with this metric. Try with change in log of cases.

  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
summary(ac_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40639 -0.15711  0.01299  0.12542  0.45660 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -0.54797    0.21112  -2.596 0.013243 *  
## total_visits_per_capita  0.18899    0.04708   4.014 0.000263 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2175 on 39 degrees of freedom
## Multiple R-squared:  0.2924, Adjusted R-squared:  0.2742 
## F-statistic: 16.11 on 1 and 39 DF,  p-value: 0.0002626
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32882 -0.18310 -0.05612  0.11291  0.63530 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        0.1634     0.1328   1.231    0.226
## total_weighted_visits_per_capita   4.4415     4.4981   0.987    0.330
## 
## Residual standard error: 0.2554 on 39 degrees of freedom
## Multiple R-squared:  0.02439,    Adjusted R-squared:  -0.0006261 
## F-statistic: 0.975 on 1 and 39 DF,  p-value: 0.3295
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44799 -0.16073 -0.00480  0.06846  0.54282 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  -0.16887    0.14975  -1.128  0.26636   
## total_visit_hours_per_capita  0.09086    0.02888   3.146  0.00316 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2309 on 39 degrees of freedom
## Multiple R-squared:  0.2024, Adjusted R-squared:  0.182 
## F-statistic: 9.899 on 1 and 39 DF,  p-value: 0.003163
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28630 -0.20207 -0.06042  0.09037  0.62444 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.27962    0.05865   4.768  2.6e-05 ***
## total_weighted_visit_hours_per_capita  0.18381    0.88883   0.207    0.837    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2584 on 39 degrees of freedom
## Multiple R-squared:  0.001095,   Adjusted R-squared:  -0.02452 
## F-statistic: 0.04277 on 1 and 39 DF,  p-value: 0.8372

That’s very odd, opposite of what we would expect. Maybe it’s something about having a very short time frame to look at cases in, and how the weighting by area makes the “weighted visits” number be very low? What if we look farther out in time, like 4 weeks past?

  case_weeks_past <- 4
    visits_lag <- 16
    baseline_cases_by_pop  <- 0.0004
    min_baseline_cases <- 10
    
    ac_cases_cutoff <- ac_cases_zip %>% 
      filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
      filter(cases_by_pop >= baseline_cases_by_pop) %>%
      mutate(log_cases_by_pop = log(cases_by_pop))
    
    zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
  
  # get the change in cases and total visit hours in the time period of interest
    later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  
    for (i in 1:length(zipcodes_in_cutoff)) {
      curr_zip <- zipcodes_in_cutoff[i]
      curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
    # first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well 
      change_cases_curr = NA
      change_log_cases_curr = NA
      total_visits_curr = NA
      total_visits_per_capita_curr = NA
      total_visit_hours_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_curr = NA
      total_weighted_visits_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_v2_curr = NA
      if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
      # change in cases an amount of time later
      change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
      change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
    # get the total visits in the time range of interest
      visits_curr = ac_weighted_visits_zip %>% 
        filter(zipcode == curr_zip) %>% 
        filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>% 
        summarize(total_visits_per_capita = sum(visits_per_capita),
            total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
            total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
            total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
            total_visit_hours_per_capita = sum(visit_hours_per_capita),
            total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
            total_visits = sum(total_visits))
      total_visits_curr <- visits_curr$total_visits
      total_visits_per_capita_curr = visits_curr$total_visits_per_capita
      total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
      total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
      total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
      total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
      
    }
    later_cases_change[i] = change_cases_curr
    later_log_cases_change[i] = change_log_cases_curr
    total_visits[i] = total_visits_curr
    total_visits_per_capita[i] = total_visits_per_capita_curr
    total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
    total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
    total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
    total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
  }
  
  # combine
  ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
    filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
    rename(zipcode = zipcodes_in_cutoff) %>%
    left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
    
# get linear model values
  ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
  ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)


  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.866e-04 -4.877e-04 -6.695e-05  1.951e-04  2.348e-03 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)             -4.693e-04  8.482e-04  -0.553    0.584
## total_visits_per_capita  6.981e-05  4.660e-05   1.498    0.143
## 
## Residual standard error: 0.0006874 on 34 degrees of freedom
## Multiple R-squared:  0.06192,    Adjusted R-squared:  0.03433 
## F-statistic: 2.244 on 1 and 34 DF,  p-value: 0.1433
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.406e-04 -5.117e-04 -9.538e-05  3.163e-04  2.164e-03 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      -0.0001264  0.0004307  -0.293   0.7710  
## total_weighted_visits_per_capita  0.0079184  0.0035984   2.201   0.0347 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000664 on 34 degrees of freedom
## Multiple R-squared:  0.1247, Adjusted R-squared:  0.09893 
## F-statistic: 4.842 on 1 and 34 DF,  p-value: 0.03466
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0008171 -0.0004473 -0.0001271  0.0001614  0.0023696 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)
## (Intercept)                  -3.220e-04  8.208e-04  -0.392    0.697
## total_visit_hours_per_capita  5.596e-05  4.091e-05   1.368    0.180
## 
## Residual standard error: 0.000691 on 34 degrees of freedom
## Multiple R-squared:  0.05216,    Adjusted R-squared:  0.02428 
## F-statistic: 1.871 on 1 and 34 DF,  p-value: 0.1803
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0007129 -0.0004636 -0.0001606  0.0001911  0.0022268 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                           0.0007634  0.0002335   3.270  0.00247 **
## total_weighted_visit_hours_per_capita 0.0001481  0.0011325   0.131  0.89675   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007095 on 34 degrees of freedom
## Multiple R-squared:  0.0005025,  Adjusted R-squared:  -0.02889 
## F-statistic: 0.01709 on 1 and 34 DF,  p-value: 0.8967
    ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
    summary(ac_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86387 -0.31930  0.01055  0.17622  1.27021 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -0.35820    0.58773  -0.609   0.5463  
## total_visits_per_capita  0.06890    0.03229   2.134   0.0402 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4763 on 34 degrees of freedom
## Multiple R-squared:  0.1181, Adjusted R-squared:  0.09216 
## F-statistic: 4.553 on 1 and 34 DF,  p-value: 0.04015
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68028 -0.40716 -0.00591  0.26808  1.10176 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        0.1722     0.3038   0.567   0.5746  
## total_weighted_visits_per_capita   6.1566     2.5378   2.426   0.0207 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4683 on 34 degrees of freedom
## Multiple R-squared:  0.1476, Adjusted R-squared:  0.1225 
## F-statistic: 5.885 on 1 and 34 DF,  p-value: 0.02072
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78463 -0.39103 -0.01861  0.27826  1.28838 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                  -0.18531    0.57335  -0.323   0.7485  
## total_visit_hours_per_capita  0.05385    0.02858   1.884   0.0681 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4826 on 34 degrees of freedom
## Multiple R-squared:  0.09456,    Adjusted R-squared:  0.06793 
## F-statistic: 3.551 on 1 and 34 DF,  p-value: 0.06809
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.67646 -0.35578 -0.08208  0.22805  1.15017 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.86776    0.16687   5.200 9.44e-06 ***
## total_weighted_visit_hours_per_capita  0.09365    0.80941   0.116    0.909    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5071 on 34 degrees of freedom
## Multiple R-squared:  0.0003935,  Adjusted R-squared:  -0.02901 
## F-statistic: 0.01339 on 1 and 34 DF,  p-value: 0.9086

Now weighted visits do better, but visit hours do worse. Hmm.

Comparison for change from baseline

Try 10 as baseline, 1 week past, 14 day lag.

  case_weeks_past <- 1
    visits_lag <- 14
    baseline_cases  <- 10
    min_baseline_cases <- 10
    
    ac_cases_cutoff <- ac_cases_zip %>% 
      filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
      filter(cases >= baseline_cases) %>%
      mutate(log_cases_by_pop = log(cases_by_pop))
    
    zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
  
  # get the change in cases and total visit hours in the time period of interest
    later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  
    for (i in 1:length(zipcodes_in_cutoff)) {
      curr_zip <- zipcodes_in_cutoff[i]
      curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
    # first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well 
      change_cases_curr = NA
      change_log_cases_curr = NA
      total_visits_curr = NA
      total_visits_per_capita_curr = NA
      total_visit_hours_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_curr = NA
      total_weighted_visits_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_v2_curr = NA
      if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
      # change in cases an amount of time later
      change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
      change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
    # get the total visits in the time range of interest
      visits_curr = ac_weighted_visits_zip %>% 
        filter(zipcode == curr_zip) %>% 
        filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>% 
        summarize(total_visits_per_capita = sum(visits_per_capita),
            total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
            total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
            total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
            total_visit_hours_per_capita = sum(visit_hours_per_capita),
            total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
            total_visits = sum(total_visits))
      total_visits_curr <- visits_curr$total_visits
      total_visits_per_capita_curr = visits_curr$total_visits_per_capita
      total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
      total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
      total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
      total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
      
    }
    later_cases_change[i] = change_cases_curr
    later_log_cases_change[i] = change_log_cases_curr
    total_visits[i] = total_visits_curr
    total_visits_per_capita[i] = total_visits_per_capita_curr
    total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
    total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
    total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
    total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
  }
  
  # combine
  ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
    filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
    rename(zipcode = zipcodes_in_cutoff) %>%
    left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
    
# get linear model values
  ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
  ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)


  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.658e-04 -7.291e-05 -2.233e-05  3.454e-05  3.710e-04 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -5.837e-06  6.675e-05  -0.087   0.9308  
## total_visits_per_capita  3.465e-05  1.334e-05   2.598   0.0132 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001172 on 39 degrees of freedom
## Multiple R-squared:  0.1475, Adjusted R-squared:  0.1257 
## F-statistic: 6.749 on 1 and 39 DF,  p-value: 0.01316
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.624e-04 -9.873e-05 -4.120e-06  4.228e-05  3.867e-04 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      1.559e-04  6.019e-05   2.590   0.0134 *
## total_weighted_visits_per_capita 1.706e-04  1.913e-03   0.089   0.9294  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001269 on 39 degrees of freedom
## Multiple R-squared:  0.000204,   Adjusted R-squared:  -0.02543 
## F-statistic: 0.007958 on 1 and 39 DF,  p-value: 0.9294
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.905e-04 -8.660e-05 -2.140e-06  4.659e-05  3.676e-04 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                  -9.698e-06  8.402e-05  -0.115   0.9087  
## total_visit_hours_per_capita  3.193e-05  1.532e-05   2.084   0.0438 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001204 on 39 degrees of freedom
## Multiple R-squared:  0.1002, Adjusted R-squared:  0.07711 
## F-statistic: 4.342 on 1 and 39 DF,  p-value: 0.04378
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.686e-04 -7.892e-05 -1.329e-05  5.063e-05  3.795e-04 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            1.755e-04  2.954e-05   5.940 6.27e-07
## total_weighted_visit_hours_per_capita -2.987e-04  4.520e-04  -0.661    0.513
##                                          
## (Intercept)                           ***
## total_weighted_visit_hours_per_capita    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001262 on 39 degrees of freedom
## Multiple R-squared:  0.01108,    Adjusted R-squared:  -0.01428 
## F-statistic: 0.4368 on 1 and 39 DF,  p-value: 0.5125
    ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
    summary(ac_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45833 -0.14879 -0.01571  0.12872  0.60262 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.2523     0.1431  -1.763   0.0857 .  
## total_visits_per_capita   0.1435     0.0286   5.017 1.19e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2512 on 39 degrees of freedom
## Multiple R-squared:  0.3922, Adjusted R-squared:  0.3766 
## F-statistic: 25.17 on 1 and 39 DF,  p-value: 1.188e-05
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44940 -0.24265 -0.08562  0.14159  0.64862 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        0.4000     0.1527   2.620   0.0125 *
## total_weighted_visits_per_capita   1.2839     4.8530   0.265   0.7927  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3219 on 39 degrees of freedom
## Multiple R-squared:  0.001791,   Adjusted R-squared:  -0.0238 
## F-statistic: 0.06999 on 1 and 39 DF,  p-value: 0.7927
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52819 -0.18997 -0.05996  0.14897  0.60311 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  -0.10956    0.20611  -0.532  0.59806   
## total_visit_hours_per_capita  0.10248    0.03759   2.727  0.00954 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2953 on 39 degrees of freedom
## Multiple R-squared:  0.1601, Adjusted R-squared:  0.1386 
## F-statistic: 7.434 on 1 and 39 DF,  p-value: 0.00954
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47149 -0.20620 -0.07053  0.13658  0.63519 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.50176    0.07418   6.764 4.52e-08 ***
## total_weighted_visit_hours_per_capita -1.30628    1.13489  -1.151    0.257    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3169 on 39 degrees of freedom
## Multiple R-squared:  0.03285,    Adjusted R-squared:  0.008056 
## F-statistic: 1.325 on 1 and 39 DF,  p-value: 0.2567

Super weird. Try again with 4 weeks past.

  case_weeks_past <- 4
    visits_lag <- 14
    baseline_cases  <- 10
    min_baseline_cases <- 10
    
    ac_cases_cutoff <- ac_cases_zip %>% 
      filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
      filter(cases >= baseline_cases) %>%
      mutate(log_cases_by_pop = log(cases_by_pop))
    
    zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
  
  # get the change in cases and total visit hours in the time period of interest
    later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visits_per_capita <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
    total_weighted_visit_hours_per_capita_v2 <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  
    for (i in 1:length(zipcodes_in_cutoff)) {
      curr_zip <- zipcodes_in_cutoff[i]
      curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
    # first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well 
      change_cases_curr = NA
      change_log_cases_curr = NA
      total_visits_curr = NA
      total_visits_per_capita_curr = NA
      total_visit_hours_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_curr = NA
      total_weighted_visits_per_capita_curr = NA
      total_weighted_visit_hours_per_capita_v2_curr = NA
      if ((max(curr_vals$date) >= curr_vals$date[1] + case_weeks_past*7) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + case_weeks_past*7 - visits_lag) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - visits_lag)) {
      # change in cases an amount of time later
      change_cases_curr = curr_vals$cases_by_pop[case_weeks_past*7 + 1] - curr_vals$cases_by_pop[1]
      change_log_cases_curr = curr_vals$log_cases_by_pop[case_weeks_past*7 + 1] - curr_vals$log_cases_by_pop[1]
    # get the total visits in the time range of interest
      visits_curr = ac_weighted_visits_zip %>% 
        filter(zipcode == curr_zip) %>% 
        filter(date >= (curr_vals$date[1] - visits_lag) & date < (curr_vals$date[case_weeks_past*7 + 1] - visits_lag)) %>% 
        summarize(total_visits_per_capita = sum(visits_per_capita),
            total_weighted_visits_per_capita = sum(weighted_visits_per_capita),
            total_weighted_visit_hours_per_capita = sum(weighted_visit_hours_per_capita),
            total_weighted_visit_hours_per_capita_v2 = sum(weighted_visit_hours_per_capita_v2),
            total_visit_hours_per_capita = sum(visit_hours_per_capita),
            total_visits_per_area_per_capita = sum(visits_per_area_per_capita),
            total_visits = sum(total_visits))
      total_visits_curr <- visits_curr$total_visits
      total_visits_per_capita_curr = visits_curr$total_visits_per_capita
      total_visit_hours_per_capita_curr = visits_curr$total_visit_hours_per_capita
      total_weighted_visit_hours_per_capita_curr = visits_curr$total_weighted_visit_hours_per_capita
      total_weighted_visits_per_capita_curr = visits_curr$total_weighted_visits_per_capita
      total_weighted_visit_hours_per_capita_v2_curr = visits_curr$total_weighted_visit_hours_per_capita_v2
      
    }
    later_cases_change[i] = change_cases_curr
    later_log_cases_change[i] = change_log_cases_curr
    total_visits[i] = total_visits_curr
    total_visits_per_capita[i] = total_visits_per_capita_curr
    total_visit_hours_per_capita[i] = total_visit_hours_per_capita_curr
    total_weighted_visits_per_capita[i] = total_weighted_visits_per_capita_curr
    total_weighted_visit_hours_per_capita[i] = total_weighted_visit_hours_per_capita_curr
    total_weighted_visit_hours_per_capita_v2[i] = total_weighted_visit_hours_per_capita_v2_curr
  }
  
  # combine
  ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, total_visits_per_capita, total_visit_hours_per_capita, total_weighted_visits_per_capita, total_weighted_visit_hours_per_capita, total_weighted_visit_hours_per_capita_v2, stringsAsFactors = F) %>%
    filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
    rename(zipcode = zipcodes_in_cutoff) %>%
    left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip))
    
# get linear model values
  ac_visits_cases_change_model <- lm(later_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_model <- lm(later_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_model <- lm(later_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_model <- lm(later_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)
  ac_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_visits_per_capita, ac_data_change)
  ac_wt_visits_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visits_per_capita, ac_data_change)
  ac_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_visit_hours_per_capita, ac_data_change)
  ac_wt_visit_hours_cases_change_log_model <- lm(later_log_cases_change ~ total_weighted_visit_hours_per_capita, ac_data_change)


  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visits_per_capita, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0007086 -0.0003710 -0.0001228  0.0003137  0.0024059 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -3.333e-04  5.859e-04  -0.569   0.5730  
## total_visits_per_capita  5.941e-05  3.226e-05   1.842   0.0738 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0005926 on 36 degrees of freedom
## Multiple R-squared:  0.08611,    Adjusted R-squared:  0.06073 
## F-statistic: 3.392 on 1 and 36 DF,  p-value: 0.07376
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0007431 -0.0004466 -0.0002294  0.0003675  0.0022658 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)                      0.0002792  0.0003966   0.704    0.486
## total_weighted_visits_per_capita 0.0039268  0.0033373   1.177    0.247
## 
## Residual standard error: 0.0006083 on 36 degrees of freedom
## Multiple R-squared:  0.03703,    Adjusted R-squared:  0.01028 
## F-statistic: 1.384 on 1 and 36 DF,  p-value: 0.2471
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0006018 -0.0004484 -0.0001387  0.0003377  0.0024175 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)
## (Intercept)                  -1.435e-04  7.253e-04  -0.198    0.844
## total_visit_hours_per_capita  4.359e-05  3.580e-05   1.217    0.231
## 
## Residual standard error: 0.0006075 on 36 degrees of freedom
## Multiple R-squared:  0.03954,    Adjusted R-squared:  0.01286 
## F-statistic: 1.482 on 1 and 36 DF,  p-value: 0.2314
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in cases per person'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0006331 -0.0004378 -0.0002010  0.0003476  0.0022717 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            0.0008356  0.0002156   3.876 0.000433
## total_weighted_visit_hours_per_capita -0.0005782  0.0010575  -0.547 0.587950
##                                          
## (Intercept)                           ***
## total_weighted_visit_hours_per_capita    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0006174 on 36 degrees of freedom
## Multiple R-squared:  0.008234,   Adjusted R-squared:  -0.01931 
## F-statistic: 0.2989 on 1 and 36 DF,  p-value: 0.588
    ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visits_per_capita, y = fitted(ac_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
    summary(ac_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9609 -0.2659 -0.0020  0.2350  1.1300 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -1.02253    0.48775  -2.096   0.0431 *  
## total_visits_per_capita  0.12152    0.02685   4.526 6.34e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4933 on 36 degrees of freedom
## Multiple R-squared:  0.3626, Adjusted R-squared:  0.3449 
## F-statistic: 20.48 on 1 and 36 DF,  p-value: 6.339e-05
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visits_per_capita, y = fitted(ac_wt_visits_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visits_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visits per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visits_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9683 -0.4826 -0.1709  0.5479  1.4410 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        0.8437     0.3993   2.113   0.0416 *
## total_weighted_visits_per_capita   2.7038     3.3599   0.805   0.4263  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6124 on 36 degrees of freedom
## Multiple R-squared:  0.01767,    Adjusted R-squared:  -0.009617 
## F-statistic: 0.6476 on 1 and 36 DF,  p-value: 0.4263
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_visit_hours_per_capita, y = fitted(ac_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8364 -0.3940 -0.1088  0.3823  1.1685 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  -0.66004    0.67153  -0.983  0.33222   
## total_visit_hours_per_capita  0.09044    0.03315   2.728  0.00979 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5625 on 36 degrees of freedom
## Multiple R-squared:  0.1713, Adjusted R-squared:  0.1483 
## F-statistic: 7.442 on 1 and 36 DF,  p-value: 0.009789
  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~total_weighted_visit_hours_per_capita, y = fitted(ac_wt_visit_hours_cases_change_log_model), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_wt_visit_hours_cases_change_log_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total weighted visit hours per person'), yaxis = list(title = 'Change in log(cases per person)'), title = "Alameda County")
  summary(ac_wt_visit_hours_cases_change_log_model)
## 
## Call:
## lm(formula = later_log_cases_change ~ total_weighted_visit_hours_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9984 -0.5318 -0.1327  0.4514  1.3733 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             1.3629     0.2122   6.422  1.9e-07 ***
## total_weighted_visit_hours_per_capita  -1.1521     1.0409  -1.107    0.276    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6077 on 36 degrees of freedom
## Multiple R-squared:  0.03291,    Adjusted R-squared:  0.006043 
## F-statistic: 1.225 on 1 and 36 DF,  p-value: 0.2757